#' ---
#' title: "Functions: learning from Polls"
#' author: "Lukas F. Stoetzer"
#' date: "6 Sep 2018"
#' ---

# Elicited Beliefs Functions
#################################

  # Est_Eb_dy Manski ====
  
  est_dyn_manski <- function(resp.mat, # Response Matrix in Long Format 5 X N*T
                             time, # Vector with time points
                             polls, # Information about Polls results and number of respondents and 
                             start.vals = c(log(1),log(1),0,log(1),log(0.1)) # Starting Values
  ){
    
    # Estimate dynamic beliefs with a beta distribution 
    
    # v is a vector of size % containing mean, lower_bound, upper_bound, p_lower_bound, p_upper_bound
    if(ncol(resp.mat)!=5) stop('measures (in columns) needs to be of length 5')
    
    # Some Information
    T <- max(time) 
    N <- nrow(resp.mat)/T
    
    # Log-liklihood Function for one respondent
    log_lik_q <- function(theta,
                          q=resp.mat,
                          time,
                          polls){
      
      # Paramters
      alpha <- exp(theta[1]) # Positive alpha shape paramter of beta belief
      beta  <- exp(theta[2]) # Positive beta shape paramter of beta belief
      delta <- 1 / (1 + exp(-theta[3]))
      rho <- exp(theta[4])
      sigma <- exp(theta[5]) # Meaurment error standard deviation for mean
      
      
      # Calculate shape parameter over time 
      shapes_time <- function(polls,alpha0,beta0,d,rho){
        
        # Time Points
        time <- nrow(polls) 
        polls.y <- polls[,1] # 
        polls.N <- polls[,2]
        
        # Create shape paramters
        shapes <- matrix(NA, ncol=2,nrow=(time+1))
        shapes[1,] <- c(alpha0,beta0)
        
        # Calculate for each time point
        for(t in 2:(time+1)){
          # d <- ifelse(t==2, delta[1], delta[2]) 
          shapes[t,1] <- d*shapes[(t-1),1] +  polls.y[(t-1)] * rho * polls.N[(t-1)]
          shapes[t,2] <- d*shapes[(t-1),2] +  (1-polls.y[(t-1)]) * rho * polls.N[(t-1)]
        }
        
        # Return Shapes
        return(shapes)
        
      }
      
      # Calculate Shapes 
      shapes <- shapes_time(polls,alpha,beta,delta,rho)
      
      # Log-Liklihood
      log_lik_i <- sapply(1:nrow(q),function(i){
        
        # Get Shapes
        a <- shapes[(time[i]+1),1]
        b <- shapes[(time[i]+1),2]
        
        # Probabilities 
        prob <- q[i,4:5]
        
        # mean, lower_bound, upper_bound 
        mean <- a/(b + a)
        lower_bound <- qbeta(as.numeric(prob[1]),a,b,lower.tail = T)
        upper_bound <- qbeta(as.numeric(prob[2]),a,b,lower.tail = F)
        
        # Log_liklehood
        (dnorm(q[i,1], mean,sigma ,log=T) +
            dnorm(q[i,2], lower_bound,sigma,log=T) + 
            dnorm(q[i,3], upper_bound,sigma,log=T))
      })
      
      # Return 
      log_lik_i <- unlist(log_lik_i)
      return(sum(log_lik_i))
      
    }
    
    # Minmize Log-Liklihood
    res <- optim(par=start.vals,
                 fn=log_lik_q, 
                 hessian = T, control=list(fnscale=-1),
                 lower=c(0,0,-10,-10,-10),upper=c(10,10,10,3,5),
                 method = "L-BFGS-B", 
                 polls = polls, q=resp.mat, time=time)
    
   
    # Return Parameter estimates as vector
    names(res$par) <- c("alpha0","beta0","delta","rho","sigma")
    return(list("par"= c(exp(res$par[1:2]),1/(1 + exp(-res$par[3])),exp(res$par[4]),exp(res$par[5])),
                "log_lik"= res$value,
                "n" = N,
                "par_raw"=res$par,
                "vcov"= solve(-res$hessian)))
  }
  
  # Est Manski fixed priors ======
  
  est_dyn_manski_fp <- function(resp.mat, # Response Matrix in Long Format 5 X N*T
                             time, # Vector with time points
                             polls, # Information about Polls results and number of respondents and 
                             priors = c(1,1),
                             start.vals = c(0,log(1),log(0.1)) # Starting Values
  ){
    
    # Estimate dynamic beliefs with a beta distribution 
    
    # v is a vector of size % containing mean, lower_bound, upper_bound, p_lower_bound, p_upper_bound
    if(ncol(resp.mat)!=5) stop('measures (in columns) needs to be of length 5')
    
    # Some Information
    T <- max(time) 
    N <- nrow(resp.mat)/T
    
    # Log-liklihood Function for one respondent
    log_lik_q <- function(theta,
                          q=resp.mat,
                          time, priors,
                          polls){
      
      # Paramters
      delta <- 1/(1 + exp(-theta[1]))
      rho <- exp(theta[2])
      sigma <- exp(theta[3]) # Meaurment error standard deviation for mean

      # Priors
      alpha <- priors[1] # Positive alpha shape paramter of beta belief
      beta  <- priors[2] # Positive beta shape paramter of beta belief
      
      # Calculate shape parameter over time 
      shapes_time <- function(polls,alpha0,beta0,delta,rho){
        
        # Time Points
        time <- nrow(polls) 
        polls.y <- polls[,1] # 
        polls.N <- polls[,2]
        
        # Create shape paramters
        shapes <- matrix(NA, ncol=2,nrow=(time+1))
        shapes[1,] <- c(alpha0,beta0)
        
        # Calculate for each time point
        for(t in 2:(time+1)){
          d <- delta
          shapes[t,1] <- d*shapes[(t-1),1] +  polls.y[(t-1)] * rho * polls.N[(t-1)]
          shapes[t,2] <- d*shapes[(t-1),2] +  (1-polls.y[(t-1)]) * rho * polls.N[(t-1)]
        }
        
        # Return Shapes
        return(shapes)
        
      }
      
      # Calculate Shapes 
      shapes <- shapes_time(polls,alpha,beta,delta,rho)
      
      # Log-Liklihood
      log_lik_i <- sapply(1:nrow(q),function(i){
        
        # Get Shapes
        a <- shapes[(time[i]+1),1]
        b <- shapes[(time[i]+1),2]
        
        # Probabilities 
        prob <- q[i,4:5]
        
        # mean, lower_bound, upper_bound 
        mean <- a/(b + a)
        lower_bound <- qbeta(as.numeric(prob[1]),a,b,lower.tail = T)
        upper_bound <- qbeta(as.numeric(prob[2]),a,b,lower.tail = F)
        
        # Log_liklehood
        (dnorm(q[i,1], mean,sigma ,log=T) +
            dnorm(q[i,2], lower_bound,sigma,log=T) + 
            dnorm(q[i,3], upper_bound,sigma,log=T))
      })
      
      # Return 
      log_lik_i <- unlist(log_lik_i)
      return(sum(log_lik_i))
      
    }
    
    # Minmize Log-Liklihood
    res <- optim(par=start.vals,
                 fn=log_lik_q, 
                 hessian = T, control=list(fnscale=-1),
                 lower=c(-10,-10,-10),upper=c(10,3,5),
                 method = "L-BFGS-B", 
                 polls = polls, q=resp.mat, time=time,priors=priors)
    
    
    # Return Parameter estimates as vector
    est <- c(priors,1/(1 + exp(-res$par[1])),exp(res$par[2]),exp(res$par[3]))
    names(est) <- c("alpha0","beta0","delta","rho","sigma")
    return(list("par"= est,
                "log_lik"= res$value,
                "n" = N,
                "par_raw"=res$par,
                "vcov"= solve(-res$hessian))
           )
  }
  
  # Est_Eb Manski  =======
  
  est_eB_beta_manski <- function(resp.mat,
                                 start.vals = c(log(1),log(1),log(0.1)) # Starting Values
  ){
    
    # Estimate elicited Beliefs with a beta distribution 
    # from a matrix of interval responses (y_mean,y_low,p_high,p_low,p_high)
    # and information about cut.offs (c.low,c.high)
    # normal model for measurment 
    # y_mean ~ N(mu_1,sigma_1); 
    # y_low ~ N(mu_2,sigma_1); y_high ~ N(mu_3,sigma_1); 
    # probabilities are fixed (otheriwse complicated)
    # mu_1 = alpha/( alpha + beta), 
    # mu_4 = Q(p_low, alpha, beta), mu_3 = 1-Q(p_high, alpha, beta)
    # where Q ist the CDF of the Beta distribution
    
    # v is a vector of size % containing mean, lower_bound, upper_bound, p_lower_bound, p_upper_bound
    if(ncol(resp.mat)!=5) stop('measures (in columns) needs to be of length 5')
    
    
    # Log-liklihood Function for one respondent
    log_lik_q <- function(theta,
                          null=FALSE,
                          q=resp.mat){
      
      # Paramters # Model for estimating paramters of beta
        alpha <- exp(theta[1]) # Positive alpha shape paramter of beta belief
        beta  <- exp(theta[2]) # Positive beta shape paramter of beta belief
        sigma <- exp(theta[3]) # Meaurment error standard deviation for mean
     
      # Log-Liklihood
      log_lik_i <- apply(q,1,function(dat){
        
        # Probabilities 
        prob <- dat[4:5]
        
        # mean, lower_bound, upper_bound 
        mean <- alpha/(beta + alpha)
        lower_bound <- qbeta(as.numeric(prob[1]),alpha,beta,lower.tail = T)
        upper_bound <- qbeta(as.numeric(prob[2]),alpha,beta,lower.tail = F)
        
        # Log_liklehood
        (dnorm(dat[1], mean,sigma ,log=T) +
            dnorm(dat[2], lower_bound,sigma,log=T) + 
            dnorm(dat[3], upper_bound,sigma,log=T))
      })
      
      # Return 
      return(sum(log_lik_i))
      
    }
    
    # Minmize Log-Liklihood
    res <- optim(par=start.vals,
                 fn=log_lik_q, 
                 hessian = T, control=list(fnscale=-1),
                 lower=c(0,0,-10),upper=c(10,10,5),
                 method = "L-BFGS-B")
    

    # Return Parameter estimates as vector
    names(res$par) <- c("alpha","beta","sigma")
    return(list("par"= exp(res$par),
                "log_lik"= res$value,
                "n" = nrow(resp.mat)))
  }  
  

# Est_Eb_dy Manski polls as data ====
  
  est_dyn_manski_polls <- function(resp.mat, # Response Matrix in Long Format 5 X N*T
                             priors, # Fixed priors (matrix NX2)
                             time, # Vector with time points
                             id, # Vector of respondents id row number in resp frame
                             polls.Y, polls.N,  # Information about Polls results and number of respondents and 
                             start.vals = c(0,log(1),log(0.1)) # Starting Values
  ){
    
    # Estimate dynamic beliefs with a beta distribution 
    
    # v is a vector of size % containing mean, lower_bound, upper_bound, p_lower_bound, p_upper_bound
    if(ncol(resp.mat)!=5) stop('measures (in columns) needs to be of length 5')
    
    # Some Information
    T <- max(time) 
    N <- nrow(resp.mat)/(T)
    
    # Log-liklihood Function for one respondent
    log_lik_q <- function(theta,
                          q=resp.mat,
                          priors,
                          time,
                          polls.Y, polls.N){
      
      # Paramters
      delta <- 1 / (1 + exp(-theta[1]))
      rho <- exp(theta[2])
      sigma <- exp(theta[3]) # Meaurment error standard deviation for mean
      
      
      # Calculate shape parameter over time 
      shapes_time <- function(polls.y,polls.N,d,rho){

        # Create shape paramters
        alpha <- matrix(NA, ncol=(T+1),nrow=N)
        beta <- matrix(NA, ncol=(T+1),nrow=N)
        alpha[,1] <- priors[,1]
        beta[,1] <- priors[,2]
        
        # Calculate for each time point
        for(t in 2:(T+1)){
          alpha[,t] <- d*alpha[,(t-1)] +  polls.y[,(t)] * rho * polls.N
          beta[,t] <- d*beta[,(t-1)] +  (1-polls.y[,(t)]) * rho * polls.N
        }
        
        # Return Shapes
        return(list(alpha,beta))
        
      }
      
      # Calculate Shapes 
      shapes <- shapes_time(polls.Y,polls.N, # Poll Data
                            delta,rho # paramters priors, delta, rho
                            )
      
      
      # Create row id
      id <- sort(rep(1:(nrow(q)/(T)),(T)))

      # Log-Liklihood
      log_lik_i <- sapply(1:nrow(q),function(i){
        
        # Get Shapes
        a <- shapes[[1]][id[i],(time[i]+1)]
        b <- shapes[[2]][id[i],(time[i]+1)]
        
        # Probabilities 
        prob <- q[i,4:5]
        
        # mean, lower_bound, upper_bound 
        mean <- a/(b + a)
        lower_bound <- qbeta(as.numeric(prob[1]),a,b,lower.tail = T)
        upper_bound <- qbeta(as.numeric(prob[2]),a,b,lower.tail = F)
        
        # Log_liklehood
        (dnorm(q[i,1], mean,sigma ,log=T) +
            dnorm(q[i,2], lower_bound,sigma,log=T) + 
            dnorm(q[i,3], upper_bound,sigma,log=T))
      })
      
      # Return 
      log_lik_i <- unlist(log_lik_i)
      return(sum(log_lik_i))
      
    }
    
    # Minmize Log-Liklihood
    res <- optim(par=start.vals,
                 fn=log_lik_q, 
                 hessian = T, control=list(fnscale=-1),
                 lower=c(-10,-10,-10),upper=c(10,3,5),
                 method = "L-BFGS-B", priors = priors,
                 polls.Y = polls.Y, polls.N = 1000, 
                 q=resp.mat, time=time)
    
    
    # Return Parameter estimates as vector
    names(res$par) <- c("delta","rho","sigma")
    return(list("par"= c(1/(1 + exp(-res$par[1])),exp(res$par[2]),exp(res$par[3])),
                "log_lik"= res$value,
                "n" = N,
                "par_raw"=res$par,
                "vcov"= solve(-res$hessian)))
  }
  
  
  
 